For finding lane in an image, we apply following procedure to image:
Finally we demonstrate effectiveness of above approach on a sample video. In following sections we will go through each of above mentioned bullet points.
For calibrating camera we use cv2.calibrateCamera method. This method identifies camera parameters that can be used for undistortion using a mapping of points from logical space to imsage space. We identify 9 * 6 image points using cv2.findChessboardCorners from calibration images of chessboard provided in project resources.
Sample chessboard image and undistorted image is provided below.
import numpy as np
import cv2
import glob
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from moviepy.editor import VideoFileClip
%matplotlib inline
#%matplotlib qt
class CameraHelper:
def __init__(self, calibration_image_dir):
objp = np.zeros((6*9,3), np.float32)
objp[:,:2] = np.mgrid[0:9, 0:6].T.reshape(-1,2)
objpoints = []
imgpoints = []
images = glob.glob(calibration_image_dir + 'calibration*.jpg')
img_shape = ()
for idx, fname in enumerate(images):
img = mpimg.imread(fname)
img_shape = (img.shape[1], img.shape[0])
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
ret, corners = cv2.findChessboardCorners(gray, (9,6), None)
if ret == True:
objpoints.append(objp)
imgpoints.append(corners)
self.ret, self.mtx, self.dist, self.rvecs, self.tvecs = cv2.calibrateCamera(
objpoints, imgpoints, img_shape, None, None)
def Undistort(self, img):
return cv2.undistort(img, self.mtx, self.dist, None, self.mtx)
cameraHelper = CameraHelper('./camera_cal/')
def test_undistortion():
f, (ax1, ax2) = plt.subplots(1, 2, figsize=(24, 9))
f.tight_layout()
img = mpimg.imread('./camera_cal/calibration1.jpg')
ax1.imshow(img)
ax2.imshow(cameraHelper.Undistort(img))
ax1.set_title('Original Image', fontsize=50)
ax2.set_title('Undistorted Image', fontsize=50)
plt.subplots_adjust(left=0., right=1, top=0.9, bottom=0.)
test_undistortion()
We apply color space and gradient thresholding using Thresholder class. This class applies
(H > hthresh[0]) & (H <= hthresh[1]) & (s_channel > self.threshhls[0]) & (s_channel <= self.threshhls[1])ApplySobelThreshold method.For combining, instead of logical '&' of color and sobel thresholds we use following scheme: ` sobeled = self.ApplySobelThreshold(undistorted) colored = self.ApplyColorThreshold(undistorted)
combined = np.zeros_like(colored)
combined[(sobeled == 1)] = 60
combined[(colored == 1)] += 180
` This scheme minimizes noise that may be generated by gradient while rewarding commonality between gradient and color space thresholds.
class Thresholder:
def __init__(self,
sobel_kernel = 15,
threshx = (20, 100),
threshy = (20, 100),
threshmag = (40, 120),
threshdir = (0.77, 1.6),
threshhls=(90, 255)):
self.sobel_kernel = sobel_kernel
self.threshx = threshx
self.threshy = threshy
self.threshmag = threshmag
self.threshdir = threshdir
self.threshhls = threshhls
def ApplySobelThreshold(self, img):
gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
sobelx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=self.sobel_kernel)
sobely = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=self.sobel_kernel)
absx = np.absolute(sobelx)
x = np.uint8(255 * absx / np.max(absx))
absy = np.absolute(sobely)
y = np.uint8(255 * absy / np.max(absy))
mag = np.sqrt(sobelx**2 + sobely**2)
mag = (mag * 255 / np.max(mag)).astype(np.uint8)
dir = np.arctan2(sobely, sobelx)
binary = np.zeros_like(gray)
binary[(((x >= self.threshx[0]) & (x <= self.threshx[1])) & ((y >= self.threshy[0]) & (y <= self.threshy[1])))
| (((mag >= self.threshmag[0]) & (mag <= self.threshmag[1])) & ((dir >= self.threshdir[0]) & (dir <= self.threshdir[1])))
] = 1
return binary
def ApplyColorThreshold(self, img):
hls = cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
s_channel = hls[:,:,2]
binary_output = np.zeros_like(s_channel)
H = hls[:,:,0]
hthresh = (15, 100)
binary = np.zeros_like(H)
binary_output[(H > hthresh[0]) & (H <= hthresh[1]) & (s_channel > self.threshhls[0]) & (s_channel <= self.threshhls[1])] = 1
return binary_output
def ApplySobelAndColorThreshold(self, undistorted):
sobeled = self.ApplySobelThreshold(undistorted)
colored = self.ApplyColorThreshold(undistorted)
combined = np.zeros_like(colored)
combined[(sobeled == 1)] = 60
combined[(colored == 1)] += 180
return combined
thresholder = Thresholder()
Note images 3, 7, 8 and 9; these show how gradient noise is reduced in combined image.
def test_on_images():
images = glob.glob('./test_images/*.jpg')
for idx, fname in enumerate(images):
img = mpimg.imread(fname)
undistorted = cameraHelper.Undistort(img)
fig, (axs1, axs2, axs3, axs4) = plt.subplots(1, 4, figsize=(24, 9))
axs1.imshow(img)
axs2.imshow(thresholder.ApplySobelThreshold(undistorted), cmap = 'gray')
axs3.imshow(thresholder.ApplyColorThreshold(undistorted), cmap = 'gray')
axs4.imshow(thresholder.ApplySobelAndColorThreshold(undistorted), cmap = 'gray')
axs1.set_title(fname)
axs2.set_title('Gradient threshold')
axs3.set_title('HLS threshold')
axs4.set_title('Combined')
test_on_images()
PerspectiveTransformer class provides ToBirdsEyeView and ToDashCamView methods for trasforming to-and-from perspective transform. I manually identified lane points [595, 444], [695, 444], [1276, 719], [125, 719] using straight-lines2.jpg in image which should transform to starting lines in perspective transform. The points are chosen
lanePoints = np.array([[595, 444], [695, 444], [1276, 719], [125, 719]], np.float32)
class PerspectiveTransformer():
def __init__(self):
img = mpimg.imread('./test_images/straight_lines2.jpg')
img = cameraHelper.Undistort(img)
#dst = np.float32([[100, 0],
#[img.shape[1] - 100, 0],
#[img.shape[1] - 100, 719],
#[100, 719]])
midx = (int)(img.shape[1] / 2)
dst = np.float32([[midx - 300, 0],
[midx + 300, 0],
[midx + 300, 719],
[midx - 300, 719]])
self.M = cv2.getPerspectiveTransform(lanePoints, dst)
self.InvM = cv2.getPerspectiveTransform(dst, lanePoints)
def ToBirdsEyeView(self, img):
return cv2.warpPerspective(img, self.M, (img.shape[1], img.shape[0]), flags=cv2.INTER_NEAREST)
def ToDashCamView(self, img):
return cv2.warpPerspective(img, self.InvM, (img.shape[1], img.shape[0]), flags=cv2.INTER_NEAREST)
perspectiveTransformer = PerspectiveTransformer()
Following image plots shows chosen trapezoid in gree in original image and it's bird's eye view in adajacent image.
def ShowPerspectiveTransformation(axs, fpath):
images = glob.glob(fpath)
for idx, fname in enumerate(images):
img = mpimg.imread(fname)
undistorted = cameraHelper.Undistort(img)
poly = np.zeros_like(img)
cv2.fillPoly(poly, np.int32([lanePoints]), (0,255, 0))
undistorted = cv2.addWeighted(undistorted, 1, poly, 0.3, 0)
birdsEye = perspectiveTransformer.ToBirdsEyeView(undistorted)
axs[int(idx/2)][(idx % 2) * 2].imshow(undistorted)
axs[int(idx/2)][(idx % 2) * 2 + 1].imshow(birdsEye, cmap = 'gray')
fig, axs = plt.subplots(4, 4, figsize=(24, 9))
ShowPerspectiveTransformation(axs, './test_images/*.jpg')
I began with Udacity's histogram based lane finding code to implement LaneFinder but customized in following ways:
num_windows, margin and minpix parametersLaneFinder: line 129)histogram = np.sum(binary_warped[int(5 * binary_warped.shape[0]/7):,:], axis=0)LaneFinder:lines 104-107)LaneFinder:lines 113-118)LaneFinder also provides methods
GetOffset: for obtaining vehicle positionGetRadius: for obtaiing road curvatureFrom bird's eye view, I estimated follwing paramets for calculating radius and vehicle position:
ym_per_pix = 30/720xm_per_pix = 3.7/489class LaneFinder():
def __init__(self):
self.nwindows = 13
self.margin = 30
self.minpix = 15
self.left_fit = None
self.right_fit = None
def FindLane(self, binary_warped):
if self.left_fit is None or self.right_fit is None:
return self.FindLaneForFirstFrame(binary_warped)
else:
return self.FindLaneForNonFirstFrame(binary_warped)
def GetOffset(self, img):
y = np.array([719])
leftx = self.left_fit[0]*y**2 + self.left_fit[1]*y + self.left_fit[2]
rightx = self.right_fit[0]*y**2 + self.right_fit[1]*y + self.right_fit[2]
mdpoint = (leftx + rightx) / 2
return ((img.shape[1] / 2) - mdpoint) * 3.7 / 489
def GetRadius(self):
# Define conversions in x and y from pixels space to meters
ym_per_pix = 30/720 # meters per pixel in y dimension
xm_per_pix = 3.7/489 # meters per pixel in x dimension
ploty = np.linspace(0, 719, num=720)
y_eval = np.max(ploty)
# Fit new polynomials to x,y in world space
left_fit_cr = np.polyfit(self.lefty*ym_per_pix, self.leftx*xm_per_pix, 2)
right_fit_cr = np.polyfit(self.righty*ym_per_pix, self.rightx*xm_per_pix, 2)
# Calculate the new radii of curvature
left_curverad = ((1 + (2*left_fit_cr[0]*y_eval*ym_per_pix + left_fit_cr[1])**2)**1.5) / np.absolute(2*left_fit_cr[0])
right_curverad = ((1 + (2*right_fit_cr[0]*y_eval*ym_per_pix + right_fit_cr[1])**2)**1.5) / np.absolute(2*right_fit_cr[0])
return left_curverad, right_curverad
def FindLaneForFirstFrame(self, binary_warped):
# Create an output image to draw on and visualize the result
out_img = np.dstack((binary_warped, binary_warped, binary_warped))*255
histogram = np.sum(binary_warped[int(5 * binary_warped.shape[0]/7):,:], axis=0)
# Find the peak of the left and right halves of the histogram
# These will be the starting point for the left and right lines
midpoint = np.int(histogram.shape[0]/2)
leftx_base = np.argmax(histogram[:midpoint])
rightx_base = np.argmax(histogram[midpoint:]) + midpoint
# Set height of windows
window_height = np.int(binary_warped.shape[0]/self.nwindows)
# Identify the x and y positions of all nonzero pixels in the image
nonzero = binary_warped.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Current positions to be updated for each window
leftx_current = leftx_base
rightx_current = rightx_base
# Create empty lists to receive left and right lane pixel indices
self.left_lane_inds = []
self.right_lane_inds = []
margin = self.margin
last_left_delta = 0
last_right_delta = 0
delta = 0
# Step through the windows one by one
for window in range(self.nwindows):
# Identify window boundaries in x and y (and right and left)
win_y_low = binary_warped.shape[0] - (window+1)*window_height
win_y_high = binary_warped.shape[0] - window*window_height
win_xleft_low = leftx_current - margin
win_xleft_high = leftx_current + margin
win_xright_low = rightx_current - margin
win_xright_high = rightx_current + margin
# Identify the nonzero pixels in x and y within the window
good_left_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xleft_low) & (nonzerox < win_xleft_high)).nonzero()[0]
good_right_inds = ((nonzeroy >= win_y_low) & (nonzeroy < win_y_high) & (nonzerox >= win_xright_low) & (nonzerox < win_xright_high)).nonzero()[0]
# Append these indices to the lists
self.left_lane_inds.append(good_left_inds)
self.right_lane_inds.append(good_right_inds)
# If you found > self.minpix pixels, recenter next window on their mean position
new_leftx_current = leftx_current
new_rightx_current = rightx_current
if len(good_left_inds) > self.minpix:
new_leftx_current = np.int(np.mean(nonzerox[good_left_inds])) + (int)(last_left_delta / 2)
if len(good_right_inds) > self.minpix:
new_rightx_current = np.int(np.mean(nonzerox[good_right_inds])) + (int)(last_right_delta / 2)
del_left_x = new_leftx_current - leftx_current
del_right_x = new_rightx_current - rightx_current
LCOLORVAL = 255
RCOLORVAL = 255
if len(good_left_inds) > len(good_right_inds):
new_rightx_current += del_left_x
RCOLORVAL = 124
elif len(good_right_inds) > len(good_left_inds):
new_leftx_current += del_right_x
LCOLORVAL = 124
# Draw the windows on the visualization image
cv2.rectangle(out_img,(win_xleft_low,win_y_low),(win_xleft_high,win_y_high), (0,LCOLORVAL,0), 4)
cv2.rectangle(out_img,(win_xright_low,win_y_low),(win_xright_high,win_y_high), (0,RCOLORVAL,0), 4)
last_left_delta = new_leftx_current - leftx_current
last_right_delta = new_rightx_current - rightx_current
leftx_current = new_leftx_current
rightx_current = new_rightx_current
margin += 2
# Concatenate the arrays of indices
self.left_lane_inds = np.concatenate(self.left_lane_inds)
self.right_lane_inds = np.concatenate(self.right_lane_inds)
# Extract left and right line pixel positions
self.leftx = nonzerox[self.left_lane_inds]
self.lefty = nonzeroy[self.left_lane_inds]
self.rightx = nonzerox[self.right_lane_inds]
self.righty = nonzeroy[self.right_lane_inds]
# Fit a second order polynomial to each
self.left_fit = np.polyfit(self.lefty, self.leftx, 2)
self.right_fit = np.polyfit(self.righty, self.rightx, 2)
return out_img
def FindLaneForNonFirstFrame(self, binary_warped):
# Assume you now have a new warped binary image
# from the next frame of video (also called "binary_warped")
# It's now much easier to find line pixels!
nonzero = binary_warped.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
self.left_lane_inds = ((nonzerox > (self.left_fit[0]*(nonzeroy**2) + self.left_fit[1]*nonzeroy + self.left_fit[2] - self.margin)) &
(nonzerox < (self.left_fit[0]*(nonzeroy**2) + self.left_fit[1]*nonzeroy + self.left_fit[2] + self.margin)))
self.right_lane_inds = ((nonzerox > (self.right_fit[0]*(nonzeroy**2) + self.right_fit[1]*nonzeroy + self.right_fit[2] - self.margin)) &
(nonzerox < (self.right_fit[0]*(nonzeroy**2) + self.right_fit[1]*nonzeroy + self.right_fit[2] + self.margin)))
# Again, extract left and right line pixel positions
self.leftx = nonzerox[self.left_lane_inds]
self.lefty = nonzeroy[self.left_lane_inds]
self.rightx = nonzerox[self.right_lane_inds]
self.righty = nonzeroy[self.right_lane_inds]
# Fit a second order polynomial to each
self.left_fit = np.polyfit(self.lefty, self.leftx, 2)
self.right_fit = np.polyfit(self.righty, self.rightx, 2)
def Visualize(self, binary_warped):
nonzero = binary_warped.nonzero()
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
ploty = np.linspace(0, binary_warped.shape[0]-1, binary_warped.shape[0] )
left_fitx = self.left_fit[0]*ploty**2 + self.left_fit[1]*ploty + self.left_fit[2]
right_fitx = self.right_fit[0]*ploty**2 + self.right_fit[1]*ploty + self.right_fit[2]
# Create an image to draw on and an image to show the selection window
out_img = np.dstack((binary_warped, binary_warped, binary_warped))*255
window_img = np.zeros_like(out_img)
# Color in left and right line pixels
out_img[nonzeroy[self.left_lane_inds], nonzerox[self.left_lane_inds]] = [255, 0, 0]
out_img[nonzeroy[self.right_lane_inds], nonzerox[self.right_lane_inds]] = [0, 0, 255]
# Generate a polygon to illustrate the search window area
# And recast the x and y points into usable format for cv2.fillPoly()
left_line_window1 = np.array([np.transpose(np.vstack([left_fitx-self.margin, ploty]))])
left_line_window2 = np.array([np.flipud(np.transpose(np.vstack([left_fitx+self.margin, ploty])))])
left_line_pts = np.hstack((left_line_window1, left_line_window2))
right_line_window1 = np.array([np.transpose(np.vstack([right_fitx-self.margin, ploty]))])
right_line_window2 = np.array([np.flipud(np.transpose(np.vstack([right_fitx+self.margin, ploty])))])
right_line_pts = np.hstack((right_line_window1, right_line_window2))
# Draw the lane onto the warped blank image
cv2.fillPoly(window_img, np.int_([left_line_pts]), (0,255, 0))
cv2.fillPoly(window_img, np.int_([right_line_pts]), (0,255, 0))
return cv2.addWeighted(out_img, 1, window_img, 0.3, 0), ploty, left_fitx, right_fitx
laneFinder = LaneFinder()
In following images, wining window is show in bright green color; this window's movement affects the other window for current iteration. Notice the increasing window size at the top of image.
def _LaneFindTest(fname):
img = mpimg.imread(fname)
undistorted = cameraHelper.Undistort(img)
combined = thresholder.ApplySobelAndColorThreshold(undistorted)
birdsEye = perspectiveTransformer.ToBirdsEyeView(combined)
laneFinder.__init__()
birdsEye2 = laneFinder.FindLane(birdsEye)
result, ploty, left_fitx, right_fitx = laneFinder.Visualize(birdsEye)
return img, birdsEye2, result, ploty, left_fitx, right_fitx
def LaneFindTest(fpattern):
images = glob.glob(fpattern)
for idx, fname in enumerate(images):
img, birdsEye, result, ploty, left_fitx, right_fitx = _LaneFindTest(fname)
fig, (axs1, axs2, axs3) = plt.subplots(1, 3, figsize=(24, 9))
axs1.imshow(img)
axs2.imshow(birdsEye, cmap = 'gray')
axs3.imshow(result)
axs1.set_title(fname)
axs2.set_title('Birds eye view')
axs3.set_title('Lanes')
axs3.plot(left_fitx, ploty, color='yellow')
axs3.plot(right_fitx, ploty, color='yellow')
LaneFindTest('./test_images/*.jpg')
Here we combine all above image processing steps in one single function pipeline.
def drawLanes(image, undist, warped, ploty, left_fit, right_fit, lrad, rrad, offset):
# Create an image to draw the lines on
warp_zero = np.zeros_like(warped).astype(np.uint8)
color_warp = np.dstack((warp_zero, warp_zero, warp_zero))
# Recast the x and y points into usable format for cv2.fillPoly()
left_fitx = left_fit[0]*ploty**2 + left_fit[1]*ploty + left_fit[2]
right_fitx = right_fit[0]*ploty**2 + right_fit[1]*ploty + right_fit[2]
pts_left = np.array([np.transpose(np.vstack([left_fitx, ploty]))])
pts_right = np.array([np.flipud(np.transpose(np.vstack([right_fitx, ploty])))])
pts = np.hstack((pts_left, pts_right))
# Draw the lane onto the warped blank image
cv2.fillPoly(color_warp, np.int_([pts]), (0,255, 0))
# Warp the blank back to original image space using inverse perspective matrix (Minv)
newwarp = perspectiveTransformer.ToDashCamView(color_warp)
# Combine the result with the original image
result = cv2.addWeighted(undist, 1, newwarp, 0.3, 0)
txt = "Left Radius " + str(np.around(lrad, 2)) + "m, Right Radius " + str(np.around(rrad, 2)) + "m, Offset " + str(np.around(abs(offset), 2)[0]) + "m"
if offset < 0:
txt += " on left"
elif offset > 0:
txt += " on right"
return cv2.putText(result, txt, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
def pipeline(img):
ploty = np.arange(400, 719, step=1)
undistorted = cameraHelper.Undistort(img)
combined = thresholder.ApplySobelAndColorThreshold(undistorted)
birdsEye = perspectiveTransformer.ToBirdsEyeView(combined)
laneFinder.FindLane(birdsEye)
lrad, rrad = laneFinder.GetRadius()
offset = laneFinder.GetOffset(img)
return drawLanes(img, undistorted, birdsEye, ploty, laneFinder.left_fit, laneFinder.right_fit, lrad, rrad, offset)
images = glob.glob('./test_images/*.jpg')
fig, axs = plt.subplots(2, 4, figsize=(24, 9))
for idx, fname in enumerate(images):
laneFinder.__init__()
img = mpimg.imread(fname)
axs[int(idx / 4)][idx % 4].imshow(pipeline(img))
Lane Finding was erroroneous for t = 41.75s moving on from t = 41s for sample video. Here I test it's correctness.
def SampleImages():
testClip = VideoFileClip('./project_video.mp4')
testClip.save_frame('./new_test_images/1.jpg', t=41)
testClip.save_frame('./new_test_images/2.jpg', t=41.75)
testClip.reader.close()
testClip.audio.reader.close_proc()
SampleImages()
LaneFindTest('./new_test_images/*.jpg')
fig, axs = plt.subplots(1, 2, figsize=(24, 9))
images = glob.glob('./new_test_images/*.jpg')
for idx, fname in enumerate(images):
laneFinder.__init__()
img = mpimg.imread(fname)
axs[idx].imshow(pipeline(img))
def testOnVideo(inFile, outFile):
laneFinder.__init__()
testClip = VideoFileClip(inFile)
result = testClip.fl_image(pipeline)
%time result.write_videofile(outFile, audio=False)
testClip.reader.close()
testClip.audio.reader.close_proc()
testOnVideo('./project_video.mp4', './project_video_result.mp4')
I feel radius of curvature is off due to following reasons:
I think following improvements need to be made to lane finding: